####Dan Hopkins
####Blogs Project
####R Code October 22 2006

### condor_submit_util -i bushXV030408.R -f -N -n 1
###set figure
fg <- "Bush"

llll <- 6

###sample size
MM <- 2501
sym <- c(5,7,8,9,10,11,12,13,14,15,16,17,18,20,21,22)
n.samp <- length(sym)
#sym <- 13

#########automated
library(VA)

###load data
dta <-  read.csv("/nfs/fs1/projects/poliblog/global.txt",sep=",",na=c("NA","NC","<NA>",""),header=T)

###subset figure of interest
dta2 <- dta[dta$figure==fg,]

rm(dta)

###CREATE PERMUTED CODINGS

dta2$jointcodeKW <- dta2$keneshiawashington.coding
dta2$jointcodeKW[dta2$keneshiawashington.coding %in% c(NA)] <- dta2$keneshiawashington.relev[dta2$keneshiawashington.coding %in% c(NA)]+2

dta2$jointcodeKC <- dta2$kcolton.coding
dta2$jointcodeKC[dta2$kcolton.coding %in% c(NA)] <- dta2$kcolton.relev[dta2$kcolton.coding %in% c(NA)]+2

dta2$jointcodeMK <- dta2$mknowles.coding
dta2$jointcodeMK[dta2$mknowles.coding %in% c(NA)] <- dta2$mknowles.relev[dta2$mknowles.coding %in% c(NA)]+2

dta2$jointcodeNH <- dta2$nickhayes.coding
dta2$jointcodeNH[dta2$nickhayes.coding %in% c(NA)] <- dta2$nickhayes.relev[dta2$nickhayes.coding %in% c(NA)]+2

dta2$jointcodeAP <- dta2$andrewprokop.coding
dta2$jointcodeAP[dta2$andrewprokop.coding %in% c(NA)] <- dta2$andrewprokop.relev[dta2$andrewprokop.coding %in% c(NA)]+2


###matrix of codings

codemat <- cbind(dta2$jointcodeAP,dta2$jointcodeNH,dta2$jointcodeMK,dta2$jointcodeKC,dta2$jointcodeKW)

###function to translate codings
conv.func <- function(mat){
  n<-dim(mat)[1]
  vec <- c()
  for(i in 1:n){
    cd <- mat[i,]
    if(sum(! cd %in% c(NA))==1)
      res <- cd[which(! cd %in% c(NA))]
    if(sum(! cd %in% c(NA))==0)   
      res <- NA
    
    if(any(c(-2,-1,0,1,2) %in% cd))
      res <- mean(cd[which(cd %in% c(-2,-1,0,1,2))])
    
    if( min(cd,na.rm=T)>2 & ! (sum(cd %in% c(NA))==5    ))
      res <- min(cd,na.rm=T)
    vec <- c(vec,res)
  }

  return(vec)
}

dta2$code1<-conv.func(codemat)

###RANDOMLY BREAK TIES
dta2$code2 <- NA

aa <- c(-.01,.01)
ss <- sample(aa,length(dta2$code1[dta2$code1 %in% c(-1.5,-.5,.5,1.5)]),replace=T)
dta2$code2 <- NA
dta2$code1[dta2$code1 %in% c(-1.5,-.5,.5,1.5)] <- dta2$code1[dta2$code1 %in% c(-1.5,-.5,.5,1.5)]+ss

dta2$code2[dta2$code1 <= -1.5 & ! dta2$code1 %in% c(NA)] <- -2
dta2$code2[dta2$code1 > -1.5 & dta2$code1 <= -.5 & ! dta2$code1 %in% c(NA)] <- -1
dta2$code2[dta2$code1 > -.5 & dta2$code1 <= .5 & ! dta2$code1 %in% c(NA)] <- 0
dta2$code2[dta2$code1 > .5 & dta2$code1 <= 1.5 & ! dta2$code1 %in% c(NA)] <- 1
dta2$code2[dta2$code1 > 1.5 & dta2$code1 < 2.5 & ! dta2$code1 %in% c(NA)] <- 2
dta2$code2[dta2$code1 > 2 & ! dta2$code1 %in% c(NA)] <- dta2$code1[dta2$code1 > 2 & ! dta2$code1 %in% c(NA)]

####create new codes
set.seed(llll)

### set number of generations of data
nsim <- 10
rmat11 <- matrix(NA,nsim,length(sym))
for(j in 1:nsim){
  rs <- sample(1:dim(dta2)[1],MM,replace=F)
  resmat <- tmat <- actmat <-matrix(NA,n.samp,7)
  for(ccc in 1:n.samp){
    nsy <- sym[ccc]
    dta3 <- dta2[,order(colnames(dta2))]
    first4 <- substr(colnames(dta3),start=1,stop=4)
    st<-min(which(first4=="WORD"))
    fn <-max(which(first4=="WORD")) 
    nwords <-fn - st + 1

    dta.BUSH <- as.data.frame(1*(dta3[,st:fn]>0))
    dta.BUSH$jointcode <- dta3$code2
    dta.BUSHf <- na.omit(dta.BUSH)

    sttxt <- colnames(dta.BUSHf)[1]
    fntxt <- colnames(dta.BUSHf)[nwords]
    nn<-apply(dta.BUSHf[,1:nwords],2,sum)/dim(dta.BUSHf)[1]
    nn2 <- nn
    nn2[nn>.99 | nn < .01] <- 0
    wts <- nn2/sum(nn2)
    train <- dta.BUSHf[rs,]
  
####remove half of -2
    test <- dta.BUSHf
#    nnn <- dim(test)[1]
#    vec11 <- which(test$jointcode==-2)
#    rs2 <- sample(1:length(vec11),round(length(vec11)/2,digits=0),replace=F)
#    vec22 <- vec11[rs2]
    testA <- test #[-vec22,]


    v1<-apply(train,2,sd)
    remov1<-which(v1==0)

    v2<-apply(testA,2,sd)
    remov2<-which(v2==0)
    if(sum(c(remov1,remov2))>0){
      train2 <- train[,-c(remov1,remov2)]
      test2 <- testA[,-c(remov1,remov2)]
      wts2 <- wts[-c(remov1,remov2)]
    }else{
      train2 <- train
      test2 <- test
      wts2 <- wts
    }
    
    txt <- paste("vout1<-va(cbind(",sttxt,"+...+",fntxt,")~jointcode,data=list(train2,test2),nsymp=nsy,n.subset=1,prob.wt=wts2,boot.se=F,printit=F)",sep="")
    eval(parse(text=txt))
    resmat[ccc,] <- vout1$est.CSMF
    actmat[ccc,] <- table(test2$jointcode)/sum(table(test2$jointcode))
    actmat[ccc,2] <- round(mean(1*(test2$jointcode==-1)),digits=6)
    actmat[ccc,3] <- round(mean(1*(test2$jointcode==0)),digits=6)
    actmat[ccc,4] <- round(mean(1*(test2$jointcode==1)),digits=6)
    actmat[ccc,5] <- round(mean(1*(test2$jointcode==2)),digits=6)
    actmat[ccc,6] <- round(mean(1*(test2$jointcode==3)),digits=6)
    actmat[ccc,7] <- round(mean(1*(test2$jointcode==4)),digits=6) 
  }
  rmat11[j,] <- apply(abs(resmat-actmat),1,sum)
}
save(rmat11,actmat,resmat,sym,vout1,file="/nfs/fs1/projects/poliblog/replication/blogs/BushXV030708.Rdata")


